scipy.cluster.vq
¶聚类算法可用于信息论,目标检测,通信,压缩和其他领域。vq
模块仅支持矢量量化和k-means算法。
k-means
:该算法试图最小化观测值与质心之间的欧氏距离。包括几个初始化方法。
scipy.cluster.hierarchy
层次结构模块提供了用于层次结构和聚类的功能。它从距离矩阵生成层次聚类,计算聚类的统计信息,剪切链接以生成平坦聚类以及使用树状图可视化聚类。
from scipy.cluster.vq import kmeans2
import matplotlib.pyplot as plt
np.random.seed(12345678)
a = np.random.multivariate_normal([0, 6], [[2, 1], [1, 1.5]], size=45)
b = np.random.multivariate_normal([2, 0], [[1, -1], [-1, 3]], size=30)
c = np.random.multivariate_normal([6, 4], [[5, 0], [0, 1.2]], size=25)
z = np.concatenate((a, b, c))
np.random.shuffle(z)
centroid, label = kmeans2(z, 3, minit='points')
w0 = z[label == 0]
w1 = z[label == 1]
w2 = z[label == 2]
_= plt.plot(w0[:, 0], w0[:, 1], 'o', alpha=0.5, label='cluster 0')
_= plt.plot(w1[:, 0], w1[:, 1], 'd', alpha=0.5, label='cluster 1')
_= plt.plot(w2[:, 0], w2[:, 1], 's', alpha=0.5, label='cluster 2')
_= plt.plot(centroid[:, 0], centroid[:, 1], 'k*', label='centroids')
_= plt.axis('equal')
_= plt.legend(shadow=True)
_= plt.show()
from scipy.cluster import hierarchy
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist
X = [[0, 0], [0, 1], [1, 0],
[0, 4], [0, 3], [1, 4],
[4, 0], [3, 0], [4, 1],
[4, 4], [3, 4], [4, 3]]
C = hierarchy.ward(pdist(X))
hierarchy.fcluster(C, t=2,criterion='maxclust')
_= plt.figure()
Z = hierarchy.linkage(C, 'single')
dn = hierarchy.dendrogram(Z)
plt.show()
from scipy.cluster.vq import kmeans2
import matplotlib.pyplot as plt
np.random.seed(12345678)
a = np.random.multivariate_normal([0, 6], [[2, 1], [1, 1.5]], size=45)
b = np.random.multivariate_normal([2, 0], [[1, -1], [-1, 3]], size=30)
c = np.random.multivariate_normal([6, 4], [[5, 0], [0, 1.2]], size=25)
z = np.concatenate((a, b, c))
np.random.shuffle(z)
centroid, label = kmeans2(z, 3, minit='points')
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [1], in <cell line: 4>() 1 from scipy.cluster.vq import kmeans2 2 import matplotlib.pyplot as plt ----> 4 np.random.seed(12345678) 5 a = np.random.multivariate_normal([0, 6], [[2, 1], [1, 1.5]], size=45) 6 b = np.random.multivariate_normal([2, 0], [[1, -1], [-1, 3]], size=30) NameError: name 'np' is not defined
w0 = z[label == 0]
w1 = z[label == 1]
w2 = z[label == 2]
_= plt.plot(w0[:, 0], w0[:, 1], 'o', alpha=0.5, label='cluster 0')
_= plt.plot(w1[:, 0], w1[:, 1], 'd', alpha=0.5, label='cluster 1')
_= plt.plot(w2[:, 0], w2[:, 1], 's', alpha=0.5, label='cluster 2')
_= plt.plot(centroid[:, 0], centroid[:, 1], 'k*', label='centroids')
_= plt.axis('equal')
_= plt.legend(shadow=True)
_= plt.show()
from scipy.cluster import hierarchy
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist
X = [[0, 0], [0, 1], [1, 0],
[0, 4], [0, 3], [1, 4],
[4, 0], [3, 0], [4, 1],
[4, 4], [3, 4], [4, 3]]
C = hierarchy.ward(pdist(X))
hierarchy.fcluster(C, t=2,criterion='maxclust')
_= plt.figure()
Z = hierarchy.linkage(C, 'single')
dn = hierarchy.dendrogram(Z)
plt.show()